In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
In [2]:
df1 = pd.read_csv('mtcars.csv')
print(df1.head())
print(df1.shape)
               model   mpg  cyl   disp   hp     wt   qsec  vs  am  gear  carb
0          Mazda RX4  21.0    6  160.0  110  2.620  16.46   0   1     4     4
1      Mazda RX4 Wag  21.0    6  160.0  110  2.875  17.02   0   1     4     4
2         Datsun 710  22.8    4  108.0   93  2.320  18.61   1   1     4     1
3     Hornet 4 Drive  21.4    6  258.0  110  3.215  19.44   1   0     3     1
4  Hornet Sportabout  18.7    8  360.0  175  3.440  17.02   0   0     3     2
(32, 11)

Line Plot

In [3]:
#plt.style.use('fivethirtyeight')
plt.style.use('classic')
x1 = np.arange(0,25)
#print(x1)
y1 = x1 * 2
#print(y1)
y2 = x1 ** 2

plt.plot(x1,y1, color='yellow', linestyle='dashed', marker='o', label='linear plot')
plt.plot(x1,y2, color='blue', linestyle='solid', marker='>', label='expo plot')
plt.plot([1]*600,list(range(600)), color='#ff00ff', linestyle='dotted', marker='>', label='expo plot')
plt.xlabel('x_label')
plt.ylabel('y_label')
plt.title("first plot")
plt.legend(loc=0)

plt.savefig("myfirstplot.png")
plt.show()
In [4]:
plt.figure(figsize=(10,8),dpi=50)
plt.plot(x1,y1, color='yellow', linestyle='dashed', marker='o', label='linear plot')
plt.savefig("myfirstplot2.png",dpi=200)
In [5]:
df1.sort_values(by='mpg').plot(x='wt',y='mpg', kind='line')
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0xb8a1f08>
In [6]:
df2 = df1.sort_values(by='mpg')
plt.plot(df2.wt, df2.mpg)
#plt.plot('wt','mpg', data=df2)
Out[6]:
[<matplotlib.lines.Line2D at 0xb967248>]

Scatter Plot

In [7]:
plt.scatter(x1,y2)
Out[7]:
<matplotlib.collections.PathCollection at 0xb9a8e48>
In [8]:
plt.scatter(df2.wt,df2.mpg)
Out[8]:
<matplotlib.collections.PathCollection at 0xb84f108>
In [9]:
plt.scatter(df1.gear, df1.mpg)
Out[9]:
<matplotlib.collections.PathCollection at 0xb928948>
In [10]:
df1.gear.value_counts()
Out[10]:
3    15
4    12
5     5
Name: gear, dtype: int64
In [11]:
df1.plot(kind='scatter', x='gear',y='hp')
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0xbb76cc8>

Bar Plot

In [12]:
stockname = ['goog', 'ibm', 'tcs','hll']
stockvalues = [1000,2000,3000, 4000]
plt.bar(stockname,stockvalues, color='rg', width=0.75,align='center')
Out[12]:
<BarContainer object of 4 artists>
In [13]:
plt.barh(stockname,stockvalues, color='rg')
Out[13]:
<BarContainer object of 4 artists>
In [14]:
vc = df1.gear.value_counts()
print(vc)
vc.plot(kind='bar')
for index,values in enumerate(vc.values):
    print(index,values)
    plt.text(index,values,str(values))
3    15
4    12
5     5
Name: gear, dtype: int64
0 15
1 12
2 5

Histogram

In [15]:
agevalues = np.random.randint(0,101,size=100)
print(agevalues)
plt.hist([agevalues,agevalues*2], bins=10, rwidth=0.9,stacked=False,color=['r','g'])
[ 77  65  80  41   7   5  51  82  15   2  84  16  60  21  30  56  62  26
  24  82  47  53  68  52  37  44  97  50  27  50  38   7  96   4  81  98
  88   3  31  55  66  78  49  98  44  40   6  58  46  32  29  14  53  58
  81  67   4  10   9  77  37  57  29  93  24  33  72 100  77  72  51  71
  37  96  74   8  74  93  45  61  82  78  19  28  70  74  98  34  82  65
  46  53   4  35  79  45  80  10  73  52]
Out[15]:
([array([18., 19., 24., 24., 15.,  0.,  0.,  0.,  0.,  0.]),
  array([13.,  4.,  9., 10., 11., 13.,  8., 15.,  8.,  9.])],
 array([  2. ,  21.8,  41.6,  61.4,  81.2, 101. , 120.8, 140.6, 160.4,
        180.2, 200. ]),
 <a list of 2 Lists of Patches objects>)
In [16]:
plt.hist([agevalues,agevalues*2], bins=10, rwidth=0.9,stacked=True)
Out[16]:
([array([18., 19., 24., 24., 15.,  0.,  0.,  0.,  0.,  0.]),
  array([31., 23., 33., 34., 26., 13.,  8., 15.,  8.,  9.])],
 array([  2. ,  21.8,  41.6,  61.4,  81.2, 101. , 120.8, 140.6, 160.4,
        180.2, 200. ]),
 <a list of 2 Lists of Patches objects>)
In [17]:
df1.plot(kind='hist', y='mpg')
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0xbe3b3c8>
In [18]:
df1.plot(kind='kde', y='mpg')
Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0xcfc0e48>

Box Plot

In [19]:
plt.style.use('ggplot')
v1 = list(range(100,200))
v1.append(10)
v1.append(300)
v1.append(400)
plt.boxplot(v1)
plt.show()
In [20]:
df1.hp.plot(kind='box')
Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0xcf3fd48>
In [21]:
df1[df1.hp > 300]
Out[21]:
model mpg cyl disp hp wt qsec vs am gear carb
30 Maserati Bora 15.0 8 301.0 335 3.57 14.6 0 1 5 8

Pie Chart

In [22]:
vc = df1.gear.value_counts()
print(vc)
3    15
4    12
5     5
Name: gear, dtype: int64
In [23]:
plt.pie(x=vc.values, labels=vc.index, autopct='%.1f')
print()

In [24]:
plt.pie(x=vc.values, labels=vc.index, autopct='%.1f', explode=[0,0,0.25])
plt.legend(['abc','def','ghi'])
print()

sns Bar Plot

In [25]:
sns.barplot([100,200,300],ci=95, estimator=np.mean)
Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0xd0ebac8>
In [26]:
from seaborn.utils import ci
sns.barplot(x='mpg', data=df1)
x = ci(df1.mpg)
print(x)
x = np.std(df1.mpg)
print(x)
x = np.mean(df1.mpg)
print(x)
[10.4    32.7375]
5.932029552301219
20.090624999999996
In [27]:
sns.barplot([100,200,300,400,500])
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0xd14f248>
In [28]:
sns.barplot(x='gear',y='mpg',data=df1)
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0xd0a8308>
In [29]:
sns.barplot(x='gear',y='mpg',data=df1, hue='cyl')
Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0xced8648>
In [30]:
dp1 = df1.pivot_table(index='gear', columns='cyl', values='mpg',aggfunc='count')
print(dp1)
cyl     4    6     8
gear                
3     1.0  2.0  12.0
4     8.0  4.0   NaN
5     2.0  1.0   2.0

sns.scatter

In [31]:
sns.scatterplot(x='gear',y='mpg',hue='cyl',data=df1)
Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0xceec688>
In [32]:
sns.scatterplot(x='gear',y='cyl',data=df1)
Out[32]:
<matplotlib.axes._subplots.AxesSubplot at 0xd19f208>
In [33]:
sns.stripplot(x='gear',y='cyl',data=df1)
Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0xd20db88>
In [34]:
sns.swarmplot(x='gear',y='cyl',data=df1)
Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0xd2e2ec8>

Regression Plots

In [35]:
sns.regplot(data=df1, x='wt', y='mpg')
plt.title('regplot')
Out[35]:
Text(0.5, 1.0, 'regplot')
In [36]:
sns.lmplot(data=df1, x='wt', y='mpg')
Out[36]:
<seaborn.axisgrid.FacetGrid at 0xd3dbc48>

sns Distribution

In [37]:
sns.distplot(df1.mpg, bins=5, hist=True, kde=True, rug=True, hist_kws={'rwidth':0.5, 'color':'y'})
Out[37]:
<matplotlib.axes._subplots.AxesSubplot at 0xdb07c08>

sns IQR plot

In [38]:
sns.boxplot(df1.mpg)
Out[38]:
<matplotlib.axes._subplots.AxesSubplot at 0xdb8fb48>
In [39]:
sns.boxplot(df1.mpg, orient='v')
Out[39]:
<matplotlib.axes._subplots.AxesSubplot at 0xdc0da08>
In [40]:
sns.boxplot(data=df1,x='gear', y='mpg')
Out[40]:
<matplotlib.axes._subplots.AxesSubplot at 0xf1aa188>
In [41]:
sns.boxplot(data=df1,x='gear', y='mpg', hue='am')
Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0xe104688>
In [42]:
sns.boxenplot(data=df1,x='gear', y='mpg', hue='am')
Out[42]:
<matplotlib.axes._subplots.AxesSubplot at 0xf1fc7c8>
In [43]:
sns.violinplot(data=df1,x='gear', y='mpg', hue='am')
Out[43]:
<matplotlib.axes._subplots.AxesSubplot at 0xf3c8088>

Multi plots

In [44]:
sns.pairplot(df1[['mpg', 'gear','cyl']])
Out[44]:
<seaborn.axisgrid.PairGrid at 0xf2f9888>
In [45]:
sns.jointplot(data=df1, x='gear', y='mpg',marginal_kws={'bins':10, 'kde':True,'color':'y'}, joint_kws={'color':'b'})
Out[45]:
<seaborn.axisgrid.JointGrid at 0xf99d348>
In [46]:
pd.set_option('display.width',1000)
#heat map
cmatrix = df1.corr()
cmatrix
Out[46]:
mpg cyl disp hp wt qsec vs am gear carb
mpg 1.000000 -0.852162 -0.847551 -0.776168 -0.867659 0.418684 0.664039 0.599832 0.480285 -0.550925
cyl -0.852162 1.000000 0.902033 0.832447 0.782496 -0.591242 -0.810812 -0.522607 -0.492687 0.526988
disp -0.847551 0.902033 1.000000 0.790949 0.887980 -0.433698 -0.710416 -0.591227 -0.555569 0.394977
hp -0.776168 0.832447 0.790949 1.000000 0.658748 -0.708223 -0.723097 -0.243204 -0.125704 0.749812
wt -0.867659 0.782496 0.887980 0.658748 1.000000 -0.174716 -0.554916 -0.692495 -0.583287 0.427606
qsec 0.418684 -0.591242 -0.433698 -0.708223 -0.174716 1.000000 0.744535 -0.229861 -0.212682 -0.656249
vs 0.664039 -0.810812 -0.710416 -0.723097 -0.554916 0.744535 1.000000 0.168345 0.206023 -0.569607
am 0.599832 -0.522607 -0.591227 -0.243204 -0.692495 -0.229861 0.168345 1.000000 0.794059 0.057534
gear 0.480285 -0.492687 -0.555569 -0.125704 -0.583287 -0.212682 0.206023 0.794059 1.000000 0.274073
carb -0.550925 0.526988 0.394977 0.749812 0.427606 -0.656249 -0.569607 0.057534 0.274073 1.000000
In [47]:
plt.figure(figsize=(16,12))
sns.heatmap(cmatrix, annot=True)
Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0xfb75e48>

Subplotting

In [48]:
fig, ax = plt.subplots(3,3, figsize=(10,12),dpi=100)
ax[0][0].set_title('00')
ax[0][1].set_title('01')
ax[1][0].set_title('10')
ax[1][1].set_title('11')
ax[0][0].set_xlabel('x00')
ax[0][1].set_xlabel('x01')
ax[1][0].set_xlabel('x10')
ax[1][1].set_xlabel('x11')
#print(type(ax[0][0]))

ax[0][0].scatter(df1.gear, df1.mpg)
ax[0][1].hist(df1.mpg)
ax[1][0].bar(df1.cyl, df1.mpg)
ax[1][1].scatter(df1.carb, df1.mpg)
plt.show()
In [49]:
print(ax)
[[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FF44208>
  <matplotlib.axes._subplots.AxesSubplot object at 0x000000000FF3CF48>
  <matplotlib.axes._subplots.AxesSubplot object at 0x000000000FFA6308>]
 [<matplotlib.axes._subplots.AxesSubplot object at 0x000000000FFDF4C8>
  <matplotlib.axes._subplots.AxesSubplot object at 0x0000000010018688>
  <matplotlib.axes._subplots.AxesSubplot object at 0x000000001004E888>]
 [<matplotlib.axes._subplots.AxesSubplot object at 0x00000000100899C8>
  <matplotlib.axes._subplots.AxesSubplot object at 0x00000000102E1C48>
  <matplotlib.axes._subplots.AxesSubplot object at 0x0000000010318E48>]]
In [50]:
print(fig)
Figure(1000x1200)
In [51]:
ax[0][0].set_title('hello')
Out[51]:
Text(0.5, 1, 'hello')

Plotly

In [52]:
import plotly
plotly.offline.init_notebook_mode()
import plotly.express as px
import plotly.graph_objects as go
In [53]:
fig = px.box(data_frame=df2, y='mpg')
fig.show()
In [54]:
fig = px.scatter(data_frame=df1, x='cyl',y='mpg')
fig.show()
In [55]:
v1 = list(range(100,200))
v1.append(10)
v1.append(300)
v1.append(400)
fig = px.box(y=v1)
fig.show()
In [56]:
fig = px.histogram(data_frame=df1, x='mpg', nbins=10, title='histplot')
fig.show()
In [57]:
px.histogram(data_frame=df1, x='hp', nbins=10, title='histplot')
In [ ]: